#include "stdafx.h"

#include "hnum_pdsp_defs.h"
namespace harlinn
{
    namespace numerics
    {
        namespace SuperLU
        {
            namespace Double
            {

                void
                pdgstrf_thread_finalize(pdgstrf_threadarg_t *pdgstrf_threadarg, 
			                pxgstrf_shared_t *pxgstrf_shared,
			                SuperMatrix *A, int *perm_r,
			                SuperMatrix *L, SuperMatrix *U
			                )
                {
                /*
                 * -- SuperLU MT routine (version 2.0) --
                 * Lawrence Berkeley National Lab, Univ. of California Berkeley,
                 * and Xerox Palo Alto Research Center.
                 * September 10, 2007
                 *
                 *
                 * Purpose
                 * =======
                 * 
                 * pdgstrf_thread_finalize() performs cleanups after the multithreaded 
                 * factorization pdgstrf_thread(). It sets up the L and U data
                 * structures, and deallocats the storage associated with the structures
                 * pxgstrf_shared and pdgstrf_threadarg.
                 *
                 * Arguments
                 * =========
                 *
                 * pdgstrf_threadarg (input) pdgstrf_threadarg_t*
                 *          The structure contains the parameters to each thread.
                 *
                 * pxgstrf_shared (input) pxgstrf_shared_t*
                 *          The structure contains the shared task queue, the 
                 *          synchronization variables, and the L and U data structures.
                 *
                 * A        (input) SuperMatrix*
                 *	    Original matrix A, permutated by columns, of dimension
                 *          (A->nrow, A->ncol). The type of A can be:
                 *          Stype = NCP; Dtype = _D; Mtype = GE.
                 *
                 * perm_r   (input) int*, dimension A->nrow
                 *          Row permutation vector which defines the permutation matrix Pr,
                 *          perm_r[i] = j means row i of A is in position j in Pr*A.
                 *
                 * L        (output) SuperMatrix*
                 *          The factor L from the factorization Pr*A=L*U; use compressed row 
                 *          subscripts storage for supernodes, i.e., L has type: 
                 *          Stype = SCP, Dtype = _D, Mtype = TRLU.
                 *
                 * U        (output) SuperMatrix*
                 *	    The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
                 *          storage scheme, i.e., U has type:
                 *          Stype = NCP, Dtype = _D, Mtype = TRU.
                 *
                 *
                 */
                    register int nprocs, n, i, iinfo;
                    int       nnzL, nnzU;
                    superlumt_options_t *superlumt_options;
                    GlobalLU_t *Glu;
                    extern ExpHeader *dexpanders;

                    n = A->ncol;
                    superlumt_options = pdgstrf_threadarg->superlumt_options;
                    Glu = pxgstrf_shared->Glu;
                    Glu->supno[n] = Glu->nsuper;

                    countnz(n, pxgstrf_shared->xprune, &nnzL, &nnzU, Glu);
                    fixupL(n, perm_r, Glu);
    
                #ifdef COMPRESS_LUSUP
                    compressSUP(n, pxgstrf_shared->Glu);
                #endif

                    if ( superlumt_options->refact == YES ) {
                        /* L and U structures may have changed due to possibly different
	                   pivoting, although the storage is available. */
                        ((SCPformat *)L->Store)->nnz = nnzL;
	                ((SCPformat *)L->Store)->nsuper = Glu->supno[n];
	                ((NCPformat *)U->Store)->nnz = nnzU;
                    } else {
	                dCreate_SuperNode_Permuted(L, A->nrow, A->ncol, nnzL, Glu->lusup,
				                   Glu->xlusup, Glu->xlusup_end, 
				                   Glu->lsub, Glu->xlsub, Glu->xlsub_end,
				                   Glu->supno, Glu->xsup, Glu->xsup_end,
				                   SLU_SCP, SLU_D, SLU_TRLU);
	                dCreate_CompCol_Permuted(U, A->nrow, A->ncol, nnzU, Glu->ucol,
				                 Glu->usub, Glu->xusub, Glu->xusub_end,
				                 SLU_NCP, SLU_D, SLU_TRU);
                    }

                    /* Combine the INFO returned from individual threads. */
                    iinfo = 0;
                    nprocs = superlumt_options->nprocs;
                    for (i = 0; i < nprocs; ++i) {
                        if ( pdgstrf_threadarg[i].info ) {
	                    if (iinfo) iinfo=SUPERLU_MIN(iinfo, pdgstrf_threadarg[i].info);
	                    else iinfo = pdgstrf_threadarg[i].info;
	                }
                    }
                    *pxgstrf_shared->info = iinfo;

                #if ( DEBUGlevel>=2 )
                    printf("Last nsuper %d\n", Glu->nsuper);
                    QueryQueue(&pxgstrf_shared->taskq);
                    PrintGLGU(n, pxgstrf_shared->xprune, Glu);
                    PrintInt10("perm_r", n, perm_r);
                    PrintInt10("inv_perm_r", n, pxgstrf_shared->inv_perm_r);
                #endif

                    /* Deallocate the storage used by the parallel scheduling algorithm. */
                    ParallelFinalize(pxgstrf_shared);
                    SUPERLU_FREE(pdgstrf_threadarg);
                    SUPERLU_FREE(pxgstrf_shared->inv_perm_r);
                    SUPERLU_FREE(pxgstrf_shared->inv_perm_c);
                    SUPERLU_FREE(pxgstrf_shared->xprune);
                    SUPERLU_FREE(pxgstrf_shared->ispruned);
                    SUPERLU_FREE(dexpanders);
                    dexpanders = 0;

                #if ( DEBUGlevel>=1 )
                    printf("** pdgstrf_thread_finalize() called\n");
                #endif
                }

            };
        };
    };
};
